---
title: "Cleaning FES Youth Studies SEE"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load FES Youth Studies SEE
  fes_se_raw <- 
    read_sav('../raw-data/y-multi-fes-youth-studies/se-europe-2018-2019/FES_Youth_Studies_SEE.sav', encoding = "latin1")
```

# Clean FES Youth Studies SEE

```{r}
# declare dates
  fes_se_dates <- 
    tribble(
      ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
      "Albania", "Jan. 1, 2018", "March. 31, 2018", # "early 2018", imputed - check if NA
      "Bosnia", "Feb. 26, 2018", "March. 11, 2018",
      "Bulgaria", "Jan. 22, 2018", "March. 6, 2018",
      "Croatia", "Jan. 1, 2018", "March. 31, 2018",
      "Kosovo", "Dec. 1, 2017", "January. 31, 2018",
      "Montenegro", "Feb. 10, 2018", "March. 6, 2018",
      "North Macedonia", "Feb. 12, 2018", "March. 12, 2018",
      "Romania", "Jan. 1, 2018", "March. 31, 2018", # "early 2018", imputed - check if NA
      "Serbia", "Jan. 22, 2018", "March. 1, 2018",
      "Slovenia", "Jan. 1, 2018", "March. 31, 2018",
      ) %>% 
    mutate(
      resp_interview_start_date = as.Date(resp_interview_start_date, "%b. %d, %Y"),
      resp_interview_end_date = as.Date(resp_interview_end_date, "%b. %d, %Y"))

# clean
  clean_fes_se <- 
    fes_se_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Friedrich-Ebert-Stiftung Youth Studies in Southeast Europe",
        
      # round number (character vector, title case)  
        resp_round = "",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3NJfcxY",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = dplyr::recode(
            as.character(Country),
            "1" = "Albania",
            "2" = "Bosnia",
            "3" = "Bulgaria",
            "4" = "Croatia",
            "5" = "Kosovo",
            "6" = "North Macedonia",
            "7" = "Montenegro",
            "8" = "Romania",
            "9" = "Serbia",
            "10" = "Slovenia",
            .default = NA_character_),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date =  NA) %>% 
      left_join(fes_se_dates, by = "resp_country_common") %>% 
      mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          dplyr::recode(
            as.character(Q20),
            "0" = NA_character_,
            "1" = "Christian",
            "2" = "Muslim",
            "3" = "Christian",
            "4" = "Christian",
            "5" = "Jewish",
            "6" = "Other religion",
            .default = NA_character_),

      # respondent's denomination (character vector that corresponds to master list)
        resp_denomination = dplyr::recode(
            as.character(Q20),
            "0" = NA_character_,
            "1" = "Orthodox",
            "2" = "Muslim",
            "3" = "Roman Catholic",
            "4" = "Protestant",
            "5" = "Jewish",
            "6" = "Other",
            .default = NA_character_),
    
      # respondent's age
        resp_age = as.character(Age), 
    
      # respondent's education level
        resp_education_original = dplyr::recode(
            as.character(Q58),
            "1" = "1. No formal education / incompleted primary school [No education]",
            "2" = "2. Primary school [Primary]",
            "3" = "3. Vocational or technical secondary school [Primary]",
            "4" = "4. Secondary school: university-preparatory type [Primary]",
            "5" = "5. University-level education: Bachelor degree or similar [College]",
            "6" = "6. University-level education: Higher than bachelor degree (MA/MSC degree) [College]",
            "7" = "7. Doctoral or post-doctoral degree [College]",
            .default = NA_character_),
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = 
          case_when(
            Sex == 1 ~ 1,
            Sex == 2 ~ 0,
            TRUE ~ NA_real_),
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural = dplyr::recode(
            as.character(Q99),
            "1" = 1,
            "2" = 1,
            "3" = 0,
            "4" = 0,
            .default = NA_real_),  
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: Q17.7; QTEXT: To what degree to you trust the following people? People of other religions.; ROPTIONS: 1(/2) = Not at all [=1] + (3/4/)5 = Very much [=0]; TARGET: Different religion; TYPE: Trust",
      
      # original response (as character vector)
        resp_soc_dist_1_original = as.character(Q17.7),   

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            Q17.7 %in% c(1:2) ~ 1,
            Q17.7 %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = NA, # checked; no general trust question

      # original response (as character vector)
        resp_gentrust_original = NA,       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = NA,    
    
    #########################  
    ##### RELIGIOSITY #######
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: Q1.14; QTEXT: How often do you engage in praying?; ROPTIONS: 1 = Never + 2 = Rarely (once a month or less) + 3 = Sometimes (several times in a month) + 4 = Often (at least once a week) + 5 = Very often (every day or almost every day)",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(Q1.14),
            `98` = NA_real_,
            `99` = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (resp_religiosity_original - 1)/4

    ) %>% 
    select(starts_with("resp_"))
```

# Save data

```{r}
  saveRDS(clean_fes_se, "../cleaned-data/y-2-multi-fes-youth-studies-see.rds")
```
